library(tidyverse)
## -- Attaching packages ----------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 3.2.1 v purrr 0.3.3
## v tibble 2.1.3 v dplyr 0.8.3
## v tidyr 1.0.0 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.4.0
## -- Conflicts -------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(magrittr)
##
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
##
## set_names
## The following object is masked from 'package:tidyr':
##
## extract
drivers <- feather::read_feather("drivers.feather")
incidents <- feather::read_feather("incidents.feather")
non_motorists <- feather::read_feather("non_motorists.feather")
combo <- feather::read_feather("combo.feather")
incident_locations <- incidents %>%
filter(!not_in_county) %>%
select(longitude, latitude)
kmeans_list <- lapply(1:15,function(x)kmeans(incident_locations, centers = x, nstart = 25, iter.max = 100))
## Warning: Quick-TRANSfer stage steps exceeded maximum (= 2852800)
centroidss <- data.frame(
centroids = 1:15,
sum_of_squares = sapply(1:15,function(x)kmeans_list[[x]]$tot.withinss)
)
centroidss %>%
ggplot(aes(x = centroids, y = sum_of_squares)) +
geom_line()

centroidss %>% knitr::kable()
| 1 |
805.93049 |
| 2 |
304.84448 |
| 3 |
213.07526 |
| 4 |
172.06474 |
| 5 |
139.81327 |
| 6 |
117.46724 |
| 7 |
99.63681 |
| 8 |
87.33599 |
| 9 |
75.62231 |
| 10 |
66.97816 |
| 11 |
60.93547 |
| 12 |
55.44351 |
| 13 |
50.97869 |
| 14 |
47.61560 |
| 15 |
44.16663 |
incident_clusters <- incident_locations
for(x in 1:15){
incident_clusters$cluster <-
as.factor(paste0("cluster", kmeans_list[[x]]$cluster))
temp <-incident_clusters %>%
ggplot(aes(x = longitude, y = latitude, color = cluster)) +
geom_point()
temp %>% print()
}















contingency_table <- function(data, var1, var2){
data %>%
group_by(!!enquo(var1), !!enquo(var2)) %>%
tally() %>%
ggplot() +
geom_bin2d(aes(x = !!enquo(var1), fill = log(n), y = !!enquo(var2))) +
geom_text(aes(x = !!enquo(var1), label = n, y = !!enquo(var2))) +
scale_fill_gradient(low = "#ece7f2",
high = "#a6bddb") +
theme(
panel.grid = element_blank(),
legend.position = "none",
panel.background = element_rect(fill = "white")
)
}
temp <-chisq.test(as.factor(incidents$weather), as.factor(incidents$acrs_report_type))
## Warning in chisq.test(as.factor(incidents$weather),
## as.factor(incidents$acrs_report_type)): Chi-squared approximation may be
## incorrect
temp %>% broom::tidy() %>% knitr::kable()
| 171.3451 |
0 |
24 |
Pearson’s Chi-squared test |
p-value 3.249696710^{-24}
contingency_table(incidents, acrs_report_type, weather)

Helmets
helmet_data <- non_motorists %>%
filter(as.logical(pedestrian_type_bicyclist)) %>%
select(safety_equipment,injury_severity) %>%
mutate(safety_equipment = c("no helmet","helmet")[1+as.integer(str_detect(tolower(safety_equipment), "helmet"))])
temp <-chisq.test(as.factor(helmet_data$safety_equipment), as.factor(helmet_data$injury_severity))
## Warning in chisq.test(as.factor(helmet_data$safety_equipment),
## as.factor(helmet_data$injury_severity)): Chi-squared approximation may be
## incorrect
temp %>% broom::tidy() %>% knitr::kable()
| 7.365619 |
0.117783 |
4 |
Pearson’s Chi-squared test |
contingency_table(helmet_data, safety_equipment,injury_severity)

Speed Limits
ordered_injury <- drivers %>%
count(injury_severity) %>%
arrange(desc(n)) %>%
pull(injury_severity)
speed_limit_data <-drivers %>%
select(speed_limit,injury_severity) %>%
mutate(
speed_limit = factor(c("Under 35", "35 - 45", "50+")[1 + findInterval(speed_limit, c(31, 46))], levels = c("Under 35", "35 - 45", "50+")),
injury_severity = factor(injury_severity, ordered_injury)
)
temp <-chisq.test(as.factor(speed_limit_data$speed_limit), as.factor(speed_limit_data$injury_severity))
## Warning in chisq.test(as.factor(speed_limit_data$speed_limit),
## as.factor(speed_limit_data$injury_severity)): Chi-squared approximation may be
## incorrect
temp %>% broom::tidy() %>% knitr::kable()
| 995.2863 |
0 |
8 |
Pearson’s Chi-squared test |
p-value 1.554337810^{-209}
contingency_table(speed_limit_data, speed_limit, injury_severity)

library(prophet)
mcmc_samples <- 1500
temp_data <- incidents %>%
count(incident_date) %>%
rename(ds = incident_date, y = n) %>%
bind_rows(.,
data.frame(
ds = seq(as.Date('2015-01-01'), max(.$ds), by = 'd'),
y = 0
)
) %>%
group_by(ds) %>%
summarise(y = sum(y))
total_model <- prophet(mcmc.samples = mcmc_samples, fit = FALSE) %>%
add_country_holidays("US") %>%
fit.prophet(df = temp_data)
##
## SAMPLING FOR MODEL 'prophet' NOW (CHAIN 1).
## Chain 1:
## Chain 1: Gradient evaluation took 0.003 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 30 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1:
## Chain 1:
## Chain 1: Iteration: 1 / 1500 [ 0%] (Warmup)
## Chain 1: Iteration: 150 / 1500 [ 10%] (Warmup)
## Chain 1: Iteration: 300 / 1500 [ 20%] (Warmup)
## Chain 1: Iteration: 450 / 1500 [ 30%] (Warmup)
## Chain 1: Iteration: 600 / 1500 [ 40%] (Warmup)
## Chain 1: Iteration: 750 / 1500 [ 50%] (Warmup)
## Chain 1: Iteration: 751 / 1500 [ 50%] (Sampling)
## Chain 1: Iteration: 900 / 1500 [ 60%] (Sampling)
## Chain 1: Iteration: 1050 / 1500 [ 70%] (Sampling)
## Chain 1: Iteration: 1200 / 1500 [ 80%] (Sampling)
## Chain 1: Iteration: 1350 / 1500 [ 90%] (Sampling)
## Chain 1: Iteration: 1500 / 1500 [100%] (Sampling)
## Chain 1:
## Chain 1: Elapsed Time: 143.802 seconds (Warm-up)
## Chain 1: 164.334 seconds (Sampling)
## Chain 1: 308.136 seconds (Total)
## Chain 1:
##
## SAMPLING FOR MODEL 'prophet' NOW (CHAIN 2).
## Chain 2:
## Chain 2: Gradient evaluation took 0 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2:
## Chain 2:
## Chain 2: Iteration: 1 / 1500 [ 0%] (Warmup)
## Chain 2: Iteration: 150 / 1500 [ 10%] (Warmup)
## Chain 2: Iteration: 300 / 1500 [ 20%] (Warmup)
## Chain 2: Iteration: 450 / 1500 [ 30%] (Warmup)
## Chain 2: Iteration: 600 / 1500 [ 40%] (Warmup)
## Chain 2: Iteration: 750 / 1500 [ 50%] (Warmup)
## Chain 2: Iteration: 751 / 1500 [ 50%] (Sampling)
## Chain 2: Iteration: 900 / 1500 [ 60%] (Sampling)
## Chain 2: Iteration: 1050 / 1500 [ 70%] (Sampling)
## Chain 2: Iteration: 1200 / 1500 [ 80%] (Sampling)
## Chain 2: Iteration: 1350 / 1500 [ 90%] (Sampling)
## Chain 2: Iteration: 1500 / 1500 [100%] (Sampling)
## Chain 2:
## Chain 2: Elapsed Time: 134.956 seconds (Warm-up)
## Chain 2: 140.744 seconds (Sampling)
## Chain 2: 275.7 seconds (Total)
## Chain 2:
##
## SAMPLING FOR MODEL 'prophet' NOW (CHAIN 3).
## Chain 3:
## Chain 3: Gradient evaluation took 0.001 seconds
## Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 10 seconds.
## Chain 3: Adjust your expectations accordingly!
## Chain 3:
## Chain 3:
## Chain 3: Iteration: 1 / 1500 [ 0%] (Warmup)
## Chain 3: Iteration: 150 / 1500 [ 10%] (Warmup)
## Chain 3: Iteration: 300 / 1500 [ 20%] (Warmup)
## Chain 3: Iteration: 450 / 1500 [ 30%] (Warmup)
## Chain 3: Iteration: 600 / 1500 [ 40%] (Warmup)
## Chain 3: Iteration: 750 / 1500 [ 50%] (Warmup)
## Chain 3: Iteration: 751 / 1500 [ 50%] (Sampling)
## Chain 3: Iteration: 900 / 1500 [ 60%] (Sampling)
## Chain 3: Iteration: 1050 / 1500 [ 70%] (Sampling)
## Chain 3: Iteration: 1200 / 1500 [ 80%] (Sampling)
## Chain 3: Iteration: 1350 / 1500 [ 90%] (Sampling)
## Chain 3: Iteration: 1500 / 1500 [100%] (Sampling)
## Chain 3:
## Chain 3: Elapsed Time: 164.024 seconds (Warm-up)
## Chain 3: 160.649 seconds (Sampling)
## Chain 3: 324.673 seconds (Total)
## Chain 3:
##
## SAMPLING FOR MODEL 'prophet' NOW (CHAIN 4).
## Chain 4:
## Chain 4: Gradient evaluation took 0.001 seconds
## Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 10 seconds.
## Chain 4: Adjust your expectations accordingly!
## Chain 4:
## Chain 4:
## Chain 4: Iteration: 1 / 1500 [ 0%] (Warmup)
## Chain 4: Iteration: 150 / 1500 [ 10%] (Warmup)
## Chain 4: Iteration: 300 / 1500 [ 20%] (Warmup)
## Chain 4: Iteration: 450 / 1500 [ 30%] (Warmup)
## Chain 4: Iteration: 600 / 1500 [ 40%] (Warmup)
## Chain 4: Iteration: 750 / 1500 [ 50%] (Warmup)
## Chain 4: Iteration: 751 / 1500 [ 50%] (Sampling)
## Chain 4: Iteration: 900 / 1500 [ 60%] (Sampling)
## Chain 4: Iteration: 1050 / 1500 [ 70%] (Sampling)
## Chain 4: Iteration: 1200 / 1500 [ 80%] (Sampling)
## Chain 4: Iteration: 1350 / 1500 [ 90%] (Sampling)
## Chain 4: Iteration: 1500 / 1500 [100%] (Sampling)
## Chain 4:
## Chain 4: Elapsed Time: 149.079 seconds (Warm-up)
## Chain 4: 116.884 seconds (Sampling)
## Chain 4: 265.963 seconds (Total)
## Chain 4:
future <- total_model %>% make_future_dataframe(periods = 365)
total_forecast <- predict(total_model, future)
plot(total_model, total_forecast)

prophet_plot_components(total_model, total_forecast)

dyplot.prophet(total_model,total_forecast)
## Registered S3 method overwritten by 'xts':
## method from
## as.zoo.xts zoo
temp_data <- incidents %>%
filter(driver_substance_abuse_alcohol_present | driver_substance_abuse_alcohol_contributed) %>%
count(incident_date) %>%
rename(ds = incident_date, y = n) %>%
bind_rows(.,
data.frame(
ds = seq(as.Date('2015-01-01'), max(.$ds), by = 'd'),
y = 0
)
) %>%
group_by(ds) %>%
summarise(y = sum(y))
alcohol_model <- prophet(mcmc.samples = mcmc_samples) %>%
add_country_holidays("US") %>%
fit.prophet(df = temp_data)
##
## SAMPLING FOR MODEL 'prophet' NOW (CHAIN 1).
## Chain 1:
## Chain 1: Gradient evaluation took 0 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1:
## Chain 1:
## Chain 1: Iteration: 1 / 1500 [ 0%] (Warmup)
## Chain 1: Iteration: 150 / 1500 [ 10%] (Warmup)
## Chain 1: Iteration: 300 / 1500 [ 20%] (Warmup)
## Chain 1: Iteration: 450 / 1500 [ 30%] (Warmup)
## Chain 1: Iteration: 600 / 1500 [ 40%] (Warmup)
## Chain 1: Iteration: 750 / 1500 [ 50%] (Warmup)
## Chain 1: Iteration: 751 / 1500 [ 50%] (Sampling)
## Chain 1: Iteration: 900 / 1500 [ 60%] (Sampling)
## Chain 1: Iteration: 1050 / 1500 [ 70%] (Sampling)
## Chain 1: Iteration: 1200 / 1500 [ 80%] (Sampling)
## Chain 1: Iteration: 1350 / 1500 [ 90%] (Sampling)
## Chain 1: Iteration: 1500 / 1500 [100%] (Sampling)
## Chain 1:
## Chain 1: Elapsed Time: 125.181 seconds (Warm-up)
## Chain 1: 102.083 seconds (Sampling)
## Chain 1: 227.264 seconds (Total)
## Chain 1:
##
## SAMPLING FOR MODEL 'prophet' NOW (CHAIN 2).
## Chain 2:
## Chain 2: Gradient evaluation took 0.001 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 10 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2:
## Chain 2:
## Chain 2: Iteration: 1 / 1500 [ 0%] (Warmup)
## Chain 2: Iteration: 150 / 1500 [ 10%] (Warmup)
## Chain 2: Iteration: 300 / 1500 [ 20%] (Warmup)
## Chain 2: Iteration: 450 / 1500 [ 30%] (Warmup)
## Chain 2: Iteration: 600 / 1500 [ 40%] (Warmup)
## Chain 2: Iteration: 750 / 1500 [ 50%] (Warmup)
## Chain 2: Iteration: 751 / 1500 [ 50%] (Sampling)
## Chain 2: Iteration: 900 / 1500 [ 60%] (Sampling)
## Chain 2: Iteration: 1050 / 1500 [ 70%] (Sampling)
## Chain 2: Iteration: 1200 / 1500 [ 80%] (Sampling)
## Chain 2: Iteration: 1350 / 1500 [ 90%] (Sampling)
## Chain 2: Iteration: 1500 / 1500 [100%] (Sampling)
## Chain 2:
## Chain 2: Elapsed Time: 131.418 seconds (Warm-up)
## Chain 2: 90.105 seconds (Sampling)
## Chain 2: 221.523 seconds (Total)
## Chain 2:
##
## SAMPLING FOR MODEL 'prophet' NOW (CHAIN 3).
## Chain 3:
## Chain 3: Gradient evaluation took 0.001 seconds
## Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 10 seconds.
## Chain 3: Adjust your expectations accordingly!
## Chain 3:
## Chain 3:
## Chain 3: Iteration: 1 / 1500 [ 0%] (Warmup)
## Chain 3: Iteration: 150 / 1500 [ 10%] (Warmup)
## Chain 3: Iteration: 300 / 1500 [ 20%] (Warmup)
## Chain 3: Iteration: 450 / 1500 [ 30%] (Warmup)
## Chain 3: Iteration: 600 / 1500 [ 40%] (Warmup)
## Chain 3: Iteration: 750 / 1500 [ 50%] (Warmup)
## Chain 3: Iteration: 751 / 1500 [ 50%] (Sampling)
## Chain 3: Iteration: 900 / 1500 [ 60%] (Sampling)
## Chain 3: Iteration: 1050 / 1500 [ 70%] (Sampling)
## Chain 3: Iteration: 1200 / 1500 [ 80%] (Sampling)
## Chain 3: Iteration: 1350 / 1500 [ 90%] (Sampling)
## Chain 3: Iteration: 1500 / 1500 [100%] (Sampling)
## Chain 3:
## Chain 3: Elapsed Time: 132.48 seconds (Warm-up)
## Chain 3: 95.794 seconds (Sampling)
## Chain 3: 228.274 seconds (Total)
## Chain 3:
##
## SAMPLING FOR MODEL 'prophet' NOW (CHAIN 4).
## Chain 4:
## Chain 4: Gradient evaluation took 0.001 seconds
## Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 10 seconds.
## Chain 4: Adjust your expectations accordingly!
## Chain 4:
## Chain 4:
## Chain 4: Iteration: 1 / 1500 [ 0%] (Warmup)
## Chain 4: Iteration: 150 / 1500 [ 10%] (Warmup)
## Chain 4: Iteration: 300 / 1500 [ 20%] (Warmup)
## Chain 4: Iteration: 450 / 1500 [ 30%] (Warmup)
## Chain 4: Iteration: 600 / 1500 [ 40%] (Warmup)
## Chain 4: Iteration: 750 / 1500 [ 50%] (Warmup)
## Chain 4: Iteration: 751 / 1500 [ 50%] (Sampling)
## Chain 4: Iteration: 900 / 1500 [ 60%] (Sampling)
## Chain 4: Iteration: 1050 / 1500 [ 70%] (Sampling)
## Chain 4: Iteration: 1200 / 1500 [ 80%] (Sampling)
## Chain 4: Iteration: 1350 / 1500 [ 90%] (Sampling)
## Chain 4: Iteration: 1500 / 1500 [100%] (Sampling)
## Chain 4:
## Chain 4: Elapsed Time: 132.758 seconds (Warm-up)
## Chain 4: 97.425 seconds (Sampling)
## Chain 4: 230.183 seconds (Total)
## Chain 4:
future <- alcohol_model %>% make_future_dataframe(periods = 365)
alcohol_forecast <- predict(alcohol_model, future)
plot(alcohol_model, alcohol_forecast)

prophet_plot_components(alcohol_model, alcohol_forecast)

dyplot.prophet(alcohol_model, alcohol_forecast)